In [ ]:
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

Modern CNN Architectures - Lab 1

Overview

In this lab, you will attempt to find an improvement on a mini-VGG for CIFAR-10.

If one tried training a CIFAR-10 with a full VGG16 or VGG19, it will not learn. This is because the input size of CIFAR-10 is very small (32x32), so by the time you get to the bottleneck layer (last convolution before flattening), the feature maps will be only 1x1 pixels -- and thus have no spatial information. Conventional practice is 4x4 is ideal, but 3x3 generally works.

Below is a composable "class" based version for building VGG networks. Spend a few moments looking at the structure and get familiar.


In [ ]:
# VGG (16 and 19 & Composable) (2014)
# Paper: https://arxiv.org/pdf/1409.1556.pdf

import tensorflow as tf
from tensorflow.keras import Input, Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

class VGG(object):
    """ VGG (composable)
    """
    # Meta-parameter: list of groups: number of layers and filter size
    groups = { 16 : [ (1, 64), (2, 128), (3, 256), (3, 512), (3, 512) ],	# VGG16
               19 : [ (1, 64), (2, 128), (4, 256), (4, 256), (4, 256) ] }	# VGG19

    init_weights='glorot_uniform'
    _model = None
 

    def __init__(self, n_layers, input_shape=(224, 224, 3), n_classes=1000):
        """ Construct a VGG model
            n_layers    : number of layers (16 or 19)
            input_shape : input shape to the model
            n_classes:  : number of output classes
        """
        if n_layers not in [16, 19]:
            raise Exception("VGG: Invalid value for n_layers")
            
        # The input vector 
        inputs = Input( input_shape )

        # The stem group
        x = self.stem(inputs)

        # The learner
        x = self.learner(x, self.groups[n_layers])

        # The classifier
        outputs = self.classifier(x, n_classes)

        # Instantiate the Model
        self._model = Model(inputs, outputs)

    @property
    def model(self):
        return self._model

    @model.setter
    def model(self, _model):
        self._model = _model
    
    def stem(self, inputs):
        """ Construct the Stem Convolutional Group
            inputs : the input vector
        """
        x = Conv2D(64, (3, 3), strides=(1, 1), padding="same", activation="relu",
                   kernel_initializer=self.init_weights)(inputs)
        return x
    
    def learner(self, x, blocks):
        """ Construct the (Feature) Learner
            x        : input to the learner
            blocks   : list of groups: filter size and number of conv layers
        """ 
        # The convolutional groups
        for n_layers, n_filters in blocks:
            x = self.group(x, n_layers, n_filters)
        return x

    @staticmethod
    def group(x, n_layers, n_filters, init_weights=None):
        """ Construct a Convolutional Group
            x        : input to the group
            n_layers : number of convolutional layers
            n_filters: number of filters
        """
        if init_weights is None:
            init_weights = VGG.init_weights
        # Block of convolutional layers
        for n in range(n_layers):
            x = Conv2D(n_filters, (3, 3), strides=(1, 1), padding="same", activation="relu",
                       kernel_initializer=init_weights)(x)
        
        # Max pooling at the end of the block
        x = MaxPooling2D(2, strides=(2, 2))(x)
        return x
    
    def classifier(self, x, n_classes):
        """ Construct the Classifier
            x         : input to the classifier
            n_classes : number of output classes
        """
        # Flatten the feature maps
        x = Flatten()(x)
    
        # Two fully connected dense layers
        x = Dense(4096, activation='relu', kernel_initializer=self.init_weights)(x)
        x = Dense(4096, activation='relu', kernel_initializer=self.init_weights)(x)

        # Output layer for classification 
        x = Dense(n_classes, activation='softmax', kernel_initializer=self.init_weights)(x)
        return x

# Example of constructing a VGG 16
# vgg = VGG(16)
# model = vgg.model

Starting mini-VGG

Below is a mini-VGG I wrote for CIFAR-10. Notice how at the bottleneck layer the feature maps are 3 x 3 (max pooling).

Model Summary

    Layer (type)                 Output Shape              Param #   
=================================================================
input_4 (InputLayer)         [(None, 32, 32, 3)]       0         
_________________________________________________________________
conv2d_26 (Conv2D)           (None, 28, 28, 32)        2432      
_________________________________________________________________
conv2d_27 (Conv2D)           (None, 28, 28, 64)        18496     
_________________________________________________________________
max_pooling2d_11 (MaxPooling (None, 14, 14, 64)        0         
_________________________________________________________________
conv2d_28 (Conv2D)           (None, 14, 14, 128)       73856     
_________________________________________________________________
conv2d_29 (Conv2D)           (None, 14, 14, 128)       147584    
_________________________________________________________________
max_pooling2d_12 (MaxPooling (None, 7, 7, 128)         0         
_________________________________________________________________
flatten_3 (Flatten)          (None, 6272)              0         
_________________________________________________________________
dense_5 (Dense)              (None, 10)                62730     
=================================================================
Total params: 305,098
Trainable params: 305,098
Non-trainable params: 0

Training

Below is the results for training for 10 epochs. Notice how the validation accuracy plateaus out around 74%. Perhaps it is overfitting to the data.

 validate on 5000 samples
Epoch 1/10
45000/45000 [==============================] - 73s 2ms/sample - loss: 1.4581 - acc: 0.4728 - val_loss: 1.1139 - val_acc: 0.6030
Epoch 2/10
45000/45000 [==============================] - 77s 2ms/sample - loss: 0.9879 - acc: 0.6559 - val_loss: 0.8826 - val_acc: 0.6948
Epoch 3/10
45000/45000 [==============================] - 81s 2ms/sample - loss: 0.7916 - acc: 0.7264 - val_loss: 0.8561 - val_acc: 0.7152
Epoch 4/10
45000/45000 [==============================] - 81s 2ms/sample - loss: 0.6645 - acc: 0.7689 - val_loss: 0.7758 - val_acc: 0.7362
Epoch 5/10
45000/45000 [==============================] - 82s 2ms/sample - loss: 0.5571 - acc: 0.8058 - val_loss: 0.7687 - val_acc: 0.7568
Epoch 6/10
45000/45000 [==============================] - 82s 2ms/sample - loss: 0.4691 - acc: 0.8349 - val_loss: 0.7511 - val_acc: 0.7558
Epoch 7/10
45000/45000 [==============================] - 82s 2ms/sample - loss: 0.3811 - acc: 0.8669 - val_loss: 0.8617 - val_acc: 0.7520
Epoch 8/10
45000/45000 [==============================] - 82s 2ms/sample - loss: 0.3132 - acc: 0.8897 - val_loss: 0.9241 - val_acc: 0.7468
Epoch 9/10
45000/45000 [==============================] - 81s 2ms/sample - loss: 0.2583 - acc: 0.9076 - val_loss: 1.0457 - val_acc: 0.7438
Epoch 10/10
45000/45000 [==============================] - 83s 2ms/sample - loss: 0.2174 - acc: 0.9221 - val_loss: 1.1191 - val_acc: 0.7428

Try to Improve

How could we improve this?

  1. Perhaps adding regularization (dropout)?

  2. Perhaps adding batch normalization between the two VGG groups?

  3. Perhaps adding a squeeze (dimensionality reduction) group --model has less parameters to train?

  4. Perhaps adding another VGG group with doubling of filters (dimensionality expansion) --model has more parameters to train.

If this is a classroom, we will split into 4 teams and each team will use a different approach.


In [ ]:
import tensorflow as tf
from tensorflow.keras import Input, Model
from tensorflow.keras.layers import Conv2D, Flatten, Dense, GlobalAveragePooling2D, Reshape

# Stem
inputs = Input((32, 32, 3))
x = Conv2D(32, (3, 3), strides=1, padding='valid', activation='relu')(inputs)

# Learner
# VGG group: 1 conv layer, 64 filters
# VGG group: 2 conv layers, 128 filters
x = VGG.group(x, 1, 64)
x = VGG.group(x, 2, 128)


# Classifier
x = Flatten()(x)
outputs = Dense(10, activation='softmax')(x)
model = Model(inputs, outputs)
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['acc'])
model.summary()

In [ ]:
# Train the model on CIFAR-10
from tensorflow.keras.datasets import cifar10
import numpy as np
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
x_train = (x_train / 255.0).astype(np.float32)
x_test  = (x_test  / 255.0).astype(np.float32)
model.fit(x_train, y_train, epochs=10, batch_size=32, validation_split=0.1, verbose=1)

In [ ]:


In [ ]: